# ZINB MODELS
library(data.table)
library(pscl)
library(modelsummary)
library(dplyr)
library(knitr)
df <- fread("data/monarch_panel_transformed.csv")
# center fd variables
df[, ln_bank_c := ln_bank - mean(ln_bank, na.rm = TRUE)]
df[, ln_market_c := ln_market - mean(ln_market, na.rm = TRUE)]
df[, ln_vc_c := ln_vc - mean(ln_vc, na.rm = TRUE)]
df[, ln_bank_x_ipr_c := ln_bank_c * ipr_c]
df[, ln_market_x_ipr_c := ln_market_c * ipr_c]
df[, ln_vc_x_ipr_c := ln_vc_c * ipr_c]
df[, ln_internal_patents := log(patent_count)]
if (!dir.exists("tables")) {
dir.create("tables")
}
# zinb with incremental controls
run_zinb_incremental <- function(finance_var, latex_name, title_name) {
fd_c <- paste0(finance_var, "_c")
fd_x <- paste0(finance_var, "_x_ipr_c")
controls <- c("ln_gdp_pc", "ln_trade", "ln_rd", "ln_tertiary")
models <- list()
for (i in 1:4) {
intensity_controls <- paste(controls[1:i], collapse = " + ")
formula_str <- paste0(
"breakthrough_count ~ ",
fd_c, " + ipr_c + ", fd_x,
" + ", intensity_controls,
" + offset(ln_internal_patents) | ",
"ln_gdp_pc"
)
model <- zeroinfl(
as.formula(formula_str),
data = df,
dist = "negbin"
)
models[[i]] <- model
}
# clean labels
fin_label <- switch(finance_var,
"ln_bank" = "Bank Credit",
"ln_market" = "Market Cap",
"ln_vc" = "Venture Capital")
# variable mapping and ordering
var_names <- c(
"count_(Intercept)",
"count_ipr_c",
paste0("count_", fd_c),
paste0("count_", fd_x),
"count_ln_gdp_pc",
"count_ln_trade",
"count_ln_rd",
"count_ln_tertiary",
"zero_(Intercept)",
"zero_ln_gdp_pc"
)
var_labels <- c(
"Intercept (Count)",
"IPR Strength",
fin_label,
paste0(fin_label, " $\\times$ IPR"),
"GDP per Capita",
"Trade Openness",
"R\\&D Expenditure",
"Tertiary Education",
"Intercept (Zero)",
"GDP per Capita (Selection)"
)
var_mapping <- setNames(var_labels, var_names)
# calculating log theta except this didnt work so I hardcoded from the html tables
fmt_lt <- function(x, se=FALSE) {
if(is.na(x)) return(" ")
val <- sprintf("\\num{%.3f}", x)
if(se) return(paste0("(", val, ")"))
return(val)
}
lt_est <- numeric(4)
lt_se <- numeric(4)
for(i in 1:4) {
m <- models[[i]]
if (!is.null(m$theta) && !is.null(m$SE.theta)) {
lt_est[i] <- log(m$theta)
lt_se[i] <- m$SE.theta / m$theta
} else {
lt_est[i] <- NA; lt_se[i] <- NA
}
}
row_lt_est <- paste0("Log Theta & ", paste(sapply(lt_est, fmt_lt, se=FALSE), collapse = " & "), "\\\\")
row_lt_se <- paste0(" & ", paste(sapply(lt_se, fmt_lt, se=TRUE), collapse = " & "), "\\\\")
#save LaTeX tables
modelsummary(
models,
output = latex_name,
escape = FALSE,
align = "lcccc",
stars = c("*" = .1, "**" = .05, "***" = .01),
fmt = function(x) sprintf("\\num{%.3f}", x),
statistic = "({std.error})",
coef_map = var_mapping,
gof_map = list(list("raw" = "nobs", "clean" = "Num.Obs.", "fmt" = function(x) sprintf("\\num{%d}", x))),
notes = NULL #I add these later
)
tex <- readLines(latex_name)
# clean the wrappers so it doesn't break my pdf
start_idx <- grep("\\\\begin\\{tabular\\}", tex)[1]
end_idx <- grep("\\\\end\\{tabular\\}", tex)[1]
if(!is.na(start_idx) && !is.na(end_idx)) tex <- tex[start_idx:end_idx]
#
tex <- gsub("(\\\\num\\{[^}]+\\})(\\*+)", "\\1$^{\\2}$", tex)
# make the table easier to read
idx_toprule <- grep("\\\\toprule", tex)[1]
if (!is.na(idx_toprule)) {
tex[idx_toprule + 1] <- "Intensity: Negative Binomial Count & Model 1 & Model 2 & Model 3 & Model 4\\\\"
}
#
idx_zero <- grep("Intercept \\(Zero\\)", tex)[1]
if (!is.na(idx_zero)) {
insertion <- c(
row_lt_est,
row_lt_se,
"\\midrule",
"Selection: Logit Structural Zero & Model 1 & Model 2 & Model 3 & Model 4\\\\",
"\\midrule"
)
tex <- append(tex, insertion, after = idx_zero - 1)
}
# custom notes
idx_bottomrule <- grep("\\\\bottomrule", tex)[1]
if (!is.na(idx_bottomrule)) {
notes <- c(
"\\multicolumn{5}{l}{\\footnotesize \\textit{Notes:} $^{*}p<0.10$, $^{**}p<0.05$, $^{***}p<0.01$.} \\\\",
"\\multicolumn{5}{l}{\\footnotesize Standard errors in parentheses.} \\\\"
)
tex <- append(tex, notes, after = idx_bottomrule)
}
writeLines(tex, latex_name)
# html tables
cat("\n\n")
cat("##", title_name, "\n\n")
for (j in 1:4) {
cat("### Model", j, "\n\n")
model_sum <- summary(models[[j]])
# Intensity model
count_coefs <- model_sum$coefficients$count
if (!is.na(lt_est[j])) {
count_coefs <- rbind(count_coefs, "Log Theta" = c(lt_est[j], lt_se[j], NA, NA))
}
count_table <- data.frame(
Variable = rownames(count_coefs),
Coefficient = round(count_coefs[,1], 3),
Std_Error = round(count_coefs[,2], 3)
)
cat("#### Intensity (Negative Binomial Count Equation)\n\n")
print(knitr::kable(count_table, row.names = FALSE))
cat("\n\n")
# Selection model
zero_coefs <- model_sum$coefficients$zero
zero_table <- data.frame(
Variable = rownames(zero_coefs),
Coefficient = round(zero_coefs[,1], 3),
Std_Error = round(zero_coefs[,2], 3)
)
cat("#### Selection (Logit Structural Zero Equation)\n\n")
print(knitr::kable(zero_table, row.names = FALSE))
cat("\n\n")
cat("Observations:", models[[j]]$n, "\n\n")
}
invisible(models)
}
# run mkt
zinb_market <- run_zinb_incremental(
"ln_market",
"tables/zinb_market2.tex",
"ZINB: Market Finance and Breakthrough Innovation"
)